package baiduCheck

import (
	"github.com/astaxie/beego/httplib"
	"github.com/astaxie/beego"
	"regexp"
	"github.com/PuerkitoBio/goquery"
	"strings"
	url2 "net/url"
	"publish/pub-lib/baijia"
	"errors"
)

type BaiduCheck struct {
	Setting Setting
}

type Setting struct {
	IsCheckTitleLenght bool
	IsCheckTitle bool
	IsCheckAuthor bool
	IsCheckContent bool
}

var DefaultCheck BaiduCheck

func init(){
	DefaultCheck = BaiduCheck{
		Setting: Setting{
			IsCheckTitleLenght : true,
			IsCheckTitle :true,
			IsCheckAuthor :true,
			IsCheckContent :true,
		},
	}
}


// true 通过 false 不通过
func (b *BaiduCheck)Check(auther string, url string) bool {

	article,err := PraseArticle(url)
	if err != nil{
		beego.Error("解析失败:",err.Error())
		return false
	}

	if b.Setting.IsCheckTitleLenght {
		if len([]rune(article.Title)) < 8 || len([]rune(article.Title)) > 30 {
			beego.Warning("标题长度检查 【未通过】:",url)
			return false
		}
	}

	if b.Setting.IsCheckTitle{
		if titleCheck(article.Title) {
			beego.Warning("标题检查 【未通过】:",url)
			return false
		}
	}

	//beego.Info("标题检查通过 url:", u)
	if b.Setting.IsCheckAuthor && len(auther) > 0{
		if authorCheck(auther) {
			beego.Warning("作者检查 【未通过】:", url)
			return false
		}
	}
	//beego.Info("作者检查通过 url:", u)
	if b.Setting.IsCheckContent {
		if contentByUrl(article) {
			beego.Warning("段落内容检查 【未通过】:",url)
			return false
		}
	}
	beego.Info("Baidu检查通过 :", url)
	return true
}

func PraseArticle(articleUrl string) (*baijia.FeedArticle,error){
	api := `http://127.0.0.1:3333/article/feed?url=`+url2.QueryEscape(articleUrl)
	feedArticle := baijia.FeedArticle{}
	if err := httplib.Get(api).ToJSON(&feedArticle); err != nil {
		return nil,err
	}

	if len(feedArticle.Title) == 0 || len(feedArticle.Feed) == 0{
		return nil,errors.New("解析内容为空："+api)
	}

	return &feedArticle,nil
}


// 判断标题是否在百度完全飘红
func titleCheck(word string) bool {
	doc, err := goquery.NewDocument(`http://www.baidu.com/s?wd=` + url2.QueryEscape(word))
	if err != nil {
		beego.Error(err.Error())
		return true
	}
	find := false

	doc.Find(".result .t a").Each(func(i int, sel *goquery.Selection) {
		if !find {
			emWord := sel.Find("em").Text()
			if strings.Index(remotePoint(emWord), remotePoint(word)) >= 0 {
				find = true
			}
		}
	})

	doc.Find(".c-abstract").Each(func(i int, sel *goquery.Selection) {
		if !find {
			emWord := sel.Text()
			if strings.Index(remotePoint(emWord), remotePoint(word)) >= 0 {
				find = true
			}
		}
	})

	//doc.Find(".c-showurl").Each(func(i int, sel *goquery.Selection) {
	//	if !find {
	//		fromUrl :=sel.Text()
	//		if strings.Index(fromUrl,"baijiahao.baidu.com") >= 0{
	//			find = true
	//		}
	//	}
	//})

	return find
}

// 判断头条号是否在百家号存在
func authorCheck(word string) bool {
	doc, err := goquery.NewDocument(`http://www.baidu.com/s?wd=` + url2.QueryEscape(word+` site:baijiahao.baidu.com`))
	if err != nil {
		beego.Error(err.Error())
		return false
	}
	find := false
	doc.Find(".result .t a").Each(func(i int, sel *goquery.Selection) {
		emWord := sel.Text()

		if word+"-百家号" == emWord || emWord == word {
			find = true
		}
	})

	doc.Find(".c-showurl").Each(func(i int, sel *goquery.Selection) {
		if !find {
			fromUrl := sel.Text()
			if strings.Index(fromUrl, "baijiahao.baidu.com") >= 0 {
				find = true
			}
		}
	})

	return find
}

func contentCheck(word string) bool {

	doc, err := goquery.NewDocument(`http://www.baidu.com/s?wd=` + url2.QueryEscape(word))
	if err != nil {
		beego.Error(err.Error())
		return false
	}
	find := false
	doc.Find(".c-abstract").Each(func(i int, sel *goquery.Selection) {
		if !find {
			emWord := sel.Text()
			if strings.Index(remotePoint(emWord), remotePoint(word)) >= 0 {
				find = true
			}
		}
	})

	return find
}

// 摘取文章前5段 文本在百度搜索是否完全飘红
func contentByUrl(article *baijia.FeedArticle) bool {

	i := 0
	for _, feed := range article.Feed {
		if i > 4 {
			break
		}
		if feed.Type == "text" {
			data := []rune(feed.Data)
			if len(data) > 30 {
				data = data[0:30]
			}
			if contentCheck(string(data)) {
				//beego.Error("段落存在，检查失败 url:", u)
				return true
			}
			i++
		}
	}

	return false
}

type article struct {
	Title string `json:"title"`
	Feeds []feed `json:"feeds"`
}
type feed struct {
	Data string `json:"data"`
	Type string `json:"type"`
}

func remotePoint(word string) string {
	r := regexp.MustCompile("\\pP|\\pS")
	return r.ReplaceAllString(word, "")
}